#!/usr/bin/env python

""" MultiQC module to parse output from HTSeq Count """

from __future__ import print_function
from collections import OrderedDict
import logging

from multiqc import config
from multiqc.plots import bargraph
from multiqc.modules.base_module import BaseMultiqcModule

# Initialise the logger
log = logging.getLogger(__name__)

class MultiqcModule(BaseMultiqcModule):

    def __init__(self):

        # Initialise the parent object
        super(MultiqcModule, self).__init__(name='HTSeq Count',
        anchor='htseq', target='HTSeq Count',
        href='http://www-huber.embl.de/HTSeq/doc/count.html',
        info=" is part of the HTSeq Python package - it takes a file with aligned sequencing "\
             "reads, plus a list of genomic features and counts how many reads map to each feature.")

        # Find and load any HTSeq Count reports
        self.htseq_data = dict()
        self.htseq_keys = list()
        for f in self.find_log_files('htseq', filehandles=True):
            parsed_data = self.parse_htseq_report(f)
            if parsed_data is not None:
                self.htseq_data[f['s_name']] = parsed_data

        # Filter to strip out ignored sample names
        self.htseq_data = self.ignore_samples(self.htseq_data)

        if len(self.htseq_data) == 0:
            raise UserWarning

        log.info("Found {} reports".format(len(self.htseq_data)))

        # Write parsed report data to a file
        self.write_data_file(self.htseq_data, 'multiqc_htseq')

        # Basic Stats Table
        self.htseq_stats_table()

        # Assignment bar plot
        self.add_section( plot = self.htseq_counts_chart() )


    def parse_htseq_report (self, f):
        """ Parse the HTSeq Count log file. """
        keys = [ '__no_feature', '__ambiguous', '__too_low_aQual', '__not_aligned', '__alignment_not_unique' ]
        parsed_data = dict()
        assigned_counts = 0
        for l in f['f']:
            s = l.split("\t")
            if s[0] in keys:
                parsed_data[s[0][2:]] = int(s[-1])
            else:
                try:
                    assigned_counts += int(s[-1])
                except (ValueError, IndexError):
                    pass
        if len(parsed_data) > 0:
            parsed_data['assigned'] = assigned_counts
            parsed_data['total_count'] = sum([v for v in parsed_data.values()])
            try:
                parsed_data['percent_assigned'] = (float(parsed_data['assigned']) / float(parsed_data['total_count'])) * 100.0
            except ZeroDivisionError:
                parsed_data['percent_assigned'] = 0
            return parsed_data
        return None


    def htseq_stats_table(self):
        """ Take the parsed stats from the HTSeq Count report and add them to the
        basic stats table at the top of the report """

        headers = OrderedDict()
        headers['percent_assigned'] = {
            'title': '% Assigned',
            'description': '% Assigned reads',
            'max': 100,
            'min': 0,
            'suffix': '%',
            'scale': 'RdYlGn'
        }
        headers['assigned'] = {
            'title': '{} Assigned'.format(config.read_count_prefix),
            'description': 'Assigned Reads ({})'.format(config.read_count_desc),
            'min': 0,
            'scale': 'PuBu',
            'modify': lambda x: float(x) * config.read_count_multiplier,
            'shared_key': 'read_count'
        }
        self.general_stats_addcols(self.htseq_data, headers)


    def htseq_counts_chart (self):
        """ Make the HTSeq Count assignment rates plot """
        cats = OrderedDict()
        cats['assigned'] =      { 'name': 'Assigned' }
        cats['ambiguous'] =     { 'name': 'Ambiguous' }
        cats['alignment_not_unique'] = { 'name': 'Alignment Not Unique' }
        cats['no_feature'] =    { 'name': 'No Feature' }
        cats['too_low_aQual'] = { 'name': 'Too Low aQual' }
        cats['not_aligned'] =   { 'name': 'Not Aligned' }
        config = {
            'id': 'htseq_assignment_plot',
            'title': 'HTSeq: Count Assignments',
            'ylab': '# Reads',
            'hide_zero_cats': False,
            'cpswitch_counts_label': 'Number of Reads'
        }
        return bargraph.plot(self.htseq_data, cats, config)
